#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          Test accounting for MPS resources with various allocation options
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
############################################################################
source ./globals

set file_in1       "$test_dir/input1"
set file_in2       "$test_dir/input2"
set file_out       "$test_dir/output"
set job_id         0

#
# Validate the job, batch step and step 0 of a job have the proper MPS counts
# No step to test if step_mps == -1
#
# NOTE: AllocTRES and ReqTRES values for all steps (including batch step)
#	are reported based upon the job specification
#
proc test_acct { job_id job_mps step_mps req_mps have_mps_types batch_mps } {
	global re_word_str number
	global bin_cat bin_rm file_out sacct

	if {$job_id == 0} {
		return
	}

	log_debug "Job $job_id Expecting job MPS:$job_mps  Step MPS:$step_mps"

	# Wait for ReqTRES to be populated for the job. Job completion records are
	# sent immediately to the database, but job start records get queued and
	# can take longer.
	wait_for_command_match -fail "$sacct -X -n -o ReqTRES --parsable2 -j $job_id" "gres/mps"

	set output [run_command_output -fail "$sacct --job=$job_id --parsable2 --start=now-15minutes --format JobID,ReqTRES,AllocTRES --noheader"]
	set job_tres_dict [dict create]
	foreach line [split $output "\n"] {
		lassign [split $line "|"] JobID ReqTRES AllocTRES
		dict set job_tres_dict $JobID ReqTRES $ReqTRES
		dict set job_tres_dict $JobID AllocTRES $AllocTRES
	}

	# Check and count reported mps on the step
	if {$step_mps != -1} {
		set mps_reported_count 0
		if ![dict exists $job_tres_dict "$job_id.0"] {
			fail "sacct did not report a record for step $job_id.0"
		}
		set tres_dict [dict get $job_tres_dict "$job_id.0"]
		foreach tres_value [dict values $tres_dict] {
			if {$have_mps_types} {
				foreach {{} mps_count} [regexp -all -inline {gres/mps:[^=]+=(\d+)} $tres_value] {
					subtest {$mps_count == $step_mps} "Verify step MPS count reported by sacct" "$mps_count != $step_mps"
					incr mps_reported_count
				}
			} else {
				foreach {{} mps_count} [regexp -all -inline {gres/mps=(\d+)} $tres_value] {
					subtest {$mps_count == $step_mps} "Verify step MPS count reported by sacct" "$mps_count != $step_mps"
					incr mps_reported_count
				}
			}
		}

		subtest {$mps_reported_count == 1} "sacct should report step MPS 1 time" "$mps_reported_count != 1"
	}

	# Check and count reported batch mps on the job
	set mps_reported_count 0
	if ![dict exists $job_tres_dict "$job_id.batch"] {
		fail "sacct did not report a record for job $job_id.batch"
	}
	set tres_dict [dict get $job_tres_dict "$job_id.batch"]
	foreach tres_value [dict values $tres_dict] {
		if {$have_mps_types} {
			foreach {{} mps_count} [regexp -all -inline {gres/mps:[^=]+=(\d+)} $tres_value] {
				subtest {$mps_count == $batch_mps} "Batch MPS reported by sacct should be $batch_mps" "$mps_count != $batch_mps"
				incr mps_reported_count
			}
		} else {
			foreach {{} mps_count} [regexp -all -inline {gres/mps=(\d+)} $tres_value] {
				subtest {$mps_count == $batch_mps} "Batch MPS reported by sacct should be $batch_mps" "$mps_count != $batch_mps"
				incr mps_reported_count
			}
		}
	}
	subtest {$mps_reported_count == 1} "sacct should report batch MPS 1 time" "found $mps_reported_count times"

	# Check and count reported mps on the job
	set mps_reported_count 0
	if ![dict exists $job_tres_dict $job_id] {
		fail "sacct did not report a record for job $job_id"
	}
	set tres_dict [dict get $job_tres_dict $job_id]
	foreach tres_value [dict values $tres_dict] {
		if {$have_mps_types} {
			foreach {{} mps_count} [regexp -all -inline {gres/mps:[^=]+=(\d+)} $tres_value] {
				subtest {$mps_count == $job_mps} "Verify job MPS count reported by sacct" "$mps_count != $job_mps"
				incr mps_reported_count
			}
		} else {
			foreach {{} mps_count} [regexp -all -inline {gres/mps=(\d+)} $tres_value] {
				subtest {$mps_count == $job_mps} "Verify job MPS count reported by sacct" "$mps_count != $job_mps"
				incr mps_reported_count
			}
		}
	}

	subtest {$mps_reported_count == 2} "sacct should report job MPS 2 times" "found $mps_reported_count times"
}

#
# Validate the job, batch step and step 0 of a job have the proper MPS counts
# No step to test if step_mps == -1
#
# NOTE: AllocTRES and ReqTRES values for all steps (including batch step)
#	are reported based upon the job specification
#
proc test_out_file { file_out target } {
	global re_word_str number bin_cat

	wait_for_file -fail $file_out

	set match 0
	spawn $bin_cat $file_out
	expect {
		-re "AllocTRES=.*,gres/mps=($number)" {
			set match $expect_out(1,string)
			exp_continue
		}
		-re "AllocTRES=.*,gres/mps:($re_word_str)=($number)" {
			set match $expect_out(2,string)
			exp_continue
		}
		eof {
			wait
		}
	}
	subtest {$match == $target} "Verify MPS accounting" "$match != $target"
}

#
# Helper function to find $batch_mps from different outputs
#
proc get_batch_mps { file_out } {
	global bin_cat

	set batch_host "unknown"
	set batch_mps  "unknown"
	set output [run_command_output -fail "$bin_cat $file_out"]
	set nodes [regexp -all -line -inline {    Nodes=+.*} $output]
	set node_line [lindex $nodes 0]

	if {[llength $nodes] > 1} {
		# Output type where nodes are split on 2 lines, set $node_line to grab the BatchHost node
		#  BatchHost=74dc179a_n1
		#  ...
		# >Nodes=74dc179a_n1 CPU_IDs=0-1 Mem=150 GRES=[[mps:2]](IDX:0-1)<
		#  Nodes=74dc179a_n2 CPU_IDs=0-1 Mem=150 GRES=mps:1(IDX:0)

		regexp -all -line {BatchHost=(.*)} $output - batch_host
		set node_line [lindex [lsearch -all -inline $nodes *$batch_host*] 0]
	}

	if {![regexp {mps:(?:[^:( ]+:)?(\d+)} $node_line - batch_mps]} {
		fail "Unable to get batch_mps"
	}
	return $batch_mps
}

set store_tres [string tolower [get_config_param "AccountingStorageTRES"]]
set store_mps [string first "gres/mps" $store_tres]
if {$store_mps == -1} {
	skip "This test requires accounting for MPS"
} elseif {[get_config_param "FrontendName"] ne "MISSING"} {
	skip "This test is incompatible with front-end systems"
}

if {![check_config_select "cons_tres"]} {
	skip "This test is only compatible with select/cons_tres"
}

if { [get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
	skip "This test requires AccountStorageType=slurmdbd"
}

set nb_nodes [llength [get_nodes_by_request "--gres=mps:100 -N2 -t2"]]
if { $nb_nodes == 0} {
	skip "This test requires being able to submit job with --gres=mps:100 -N2"
}

proc cleanup {} {
	global job_id

	cancel_job $job_id
}

#
# Test --gres=mps option by job
#

log_info "TEST 1: --gres=mps option by job"

make_bash_script $file_in1 "
	$scontrol -dd show job \${SLURM_JOBID}
	exit 0"

set req_mps 49
set target [expr $nb_nodes * $req_mps]
exec $bin_rm -f $file_out
set timeout $max_job_delay
spawn $sbatch --gres=craynetwork:0 --gres=mps:$req_mps -N$nb_nodes -t1 -o $file_out -J $test_name $file_in1
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		fail "sbatch not responding"
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	fail "Job not submitted"
}

wait_for_job -fail $job_id "DONE"

wait_for_file -fail $file_out

set batch_mps [get_batch_mps $file_out]
set have_mps_types 0
set match 0
spawn $bin_cat $file_out
expect {
	-re "AllocTRES=.*,gres/mps=($number)" {
		set match $expect_out(1,string)
		exp_continue
	}
	-re "AllocTRES=.*,gres/mps:($re_word_str)=($number)" {
		if {$match == 0} {
			set have_mps_types 1
			set match $expect_out(2,string)
		}
		exp_continue
	}
	eof {
		wait
	}
}
if {$match != $target} {
	fail "Failed to account for proper MPS count ($match != $target)"
}
test_acct $job_id $target -1 $req_mps $have_mps_types $batch_mps

#
# Test --gres=mps option by step
#
make_bash_script $file_in1 "
	$srun $file_in2
	exit 0"

make_bash_script $file_in2 "
	if \[ \$SLURM_PROCID -eq 0 \]; then
		$scontrol -dd show job \${SLURM_JOBID}
		$scontrol show step \${SLURM_JOBID}.\${SLURM_STEPID}
	fi
	exit 0"

log_info "TEST 2: --gres=mps option by step"

set req_mps 51
set target [expr $nb_nodes * $req_mps]
exec $bin_rm -f $file_out
set job_id 0
set timeout $max_job_delay
spawn $sbatch --gres=craynetwork:0 --gres=mps:$req_mps -N$nb_nodes -t1 -o $file_out -J $test_name $file_in1
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		fail "sbatch not responding"
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	fail "Job not submitted"
}

wait_for_job -fail $job_id "DONE"
set batch_mps [get_batch_mps $file_out]

test_out_file $file_out $target
test_acct $job_id $target $target $req_mps $have_mps_types $batch_mps
